library(reticulate)
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(astsa) 
## 
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
## 
##     gas
library(gridExtra)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tseries)
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma       2.5     ✔ expsmooth 2.3
## 
## 
## Attaching package: 'fpp2'
## The following object is masked from 'package:astsa':
## 
##     oil
library(fma)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr   1.1.3     ✔ stringr 1.5.0
## ✔ forcats 1.0.0     ✔ tibble  3.2.1
## ✔ purrr   1.0.2     ✔ tidyr   1.3.0
## ✔ readr   2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::first()   masks xts::first()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ dplyr::last()    masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(TSstudio)
library(quantmod)
## Loading required package: TTR
library(tidyquant)
## Loading required package: PerformanceAnalytics
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(lubridate)
library(plotly)
library(TTR) # For the SMA function

Calculate the Returns

# Load necessary library
library(dplyr)



# Calculate returns
data <- df8 %>%
  mutate(
    Home_Value_Return = (Mean.Home.Value - lag(Mean.Home.Value)) / lag(Mean.Home.Value) * 100,
    Rental_Price_Return = (mean - lag(mean)) / lag(mean) * 100,
    Sale_Price_Return = (Mean.Sale.Price - lag(Mean.Sale.Price)) / lag(Mean.Sale.Price) * 100
  )
# Display the results
data$Sale_Price_Return[2:10]
## [1]  0.4467951 -1.3462815 -1.4104318 -0.3808042  0.3692401  0.4034246 -0.3072312
## [8]  1.5004562  2.9241200
data$Home_Value_Return[2:10]
## [1] 0.42554184 0.28263467 0.22048800 0.41534029 0.52144274 0.55207081 0.43885975
## [8] 0.25639865 0.09774601
data$Rental_Price_Return[2:10]
## [1]  0.3896107 -0.2107879 -0.5601882 -0.1000047 -0.2900896  0.6765544  1.3068816
## [8]  2.0381844  1.4859680
length(data$Sale_Price_Return)
## [1] 1080
length(data$Home_Value_Return)
## [1] 1080
length(data$Rental_Price_Return)
## [1] 1080
head(data)
##         date Mean.Sale.Price    state Mean.Home.Value     mean
## 1 2018-08-31          394588 Bend, OR        381885.7 1489.368
## 2 2018-09-30          396351 Bend, OR        383510.8 1495.171
## 3 2018-10-31          391015 Bend, OR        384594.7 1492.019
## 4 2018-11-30          385500 Bend, OR        385442.7 1483.661
## 5 2018-12-31          384032 Bend, OR        387043.6 1482.177
## 6 2019-01-31          385450 Bend, OR        389061.8 1477.878
##   Metropolitan.Area Home_Value_Return Rental_Price_Return Sale_Price_Return
## 1          Bend, OR                NA                  NA                NA
## 2          Bend, OR         0.4255418           0.3896107         0.4467951
## 3          Bend, OR         0.2826347          -0.2107879        -1.3462815
## 4          Bend, OR         0.2204880          -0.5601882        -1.4104318
## 5          Bend, OR         0.4153403          -0.1000047        -0.3808042
## 6          Bend, OR         0.5214427          -0.2900896         0.3692401
san_jose_data <- data %>% 
  filter(Metropolitan.Area == "San Jose, CA")
head(san_jose_data)
##         date Mean.Sale.Price        state Mean.Home.Value     mean
## 1 2018-08-31         1138521 San Jose, CA         1158784 2932.893
## 2 2018-09-30         1121256 San Jose, CA         1169495 2929.350
## 3 2018-10-31         1133715 San Jose, CA         1177582 2918.253
## 4 2018-11-30         1104818 San Jose, CA         1180853 2900.606
## 5 2018-12-31         1081306 San Jose, CA         1179078 2886.816
## 6 2019-01-31         1060308 San Jose, CA         1169053 2889.411
##   Metropolitan.Area Home_Value_Return Rental_Price_Return Sale_Price_Return
## 1      San Jose, CA        38.6990930         -0.57535478         45.463315
## 2      San Jose, CA         0.9243363         -0.12078553         -1.516441
## 3      San Jose, CA         0.6914401         -0.37882682          1.111165
## 4      San Jose, CA         0.2778182         -0.60471865         -2.548877
## 5      San Jose, CA        -0.1503518         -0.47541551         -2.128133
## 6      San Jose, CA        -0.8501966          0.08988467         -1.941911

Plot For Rental, Sale, Home Values For San Jose, CA

p <- plot_ly(data = san_jose_data, x = ~date) %>%
     add_trace(y = ~mean, name = 'Mean Rental Price', mode = 'lines') 
p
p <- plot_ly(data = san_jose_data, x = ~date) %>%
     add_trace(y = ~Mean.Sale.Price, name = 'Mean Sale Price', mode = 'lines') %>%
     add_trace(y = ~Mean.Home.Value, name = 'Mean Home Value', mode = 'lines') 
p

Stationarity

The plot shows a clear upward trend in both Mean Sale Price and Mean Home Value over time until what appears to be a sharp drop in the most recent data point. This trend indicates that the series is likely non-stationary because the mean of the series is changing over time. The presence of a trend is a strong indication that at least the mean is not constant. The drop at the end could be due to the incident of Covid-19, an extreme value, or a real market crash. ## Volatility The series seems to exhibit periods of different volatility levels. Initially, there is some fluctuation, but the variations appear relatively stable and small. However, the spike at the end of the series suggests a sudden increase in volatility. Volatility in financial time series is often clustered; periods of high volatility are followed by periods of high volatility, and periods of low volatility are followed by periods of low volatility. This plot suggests such clustering might be present, although the spike at the end may skew this perception.

Return Plots

library(ggplot2)

ggplot(san_jose_data, aes(x = date)) +
  geom_line(aes(y = Sale_Price_Return), color = "blue") +
  geom_line(aes(y = Home_Value_Return), color = "red") +
  geom_line(aes(y = Rental_Price_Return), color = "green") +
  labs(title = "Time Series Plot Of Returns", x = "Date", y = "Value") +
  theme_minimal()

Volatility

The series seems to exhibit periods of different volatility levels. Initially, there is some fluctuation, but the variations appear relatively stable and small. However, the spike at the end of the series suggests a sudden increase in volatility. Volatility in financial time series is often clustered; periods of high volatility are followed by periods of high volatility, and periods of low volatility are followed by periods of low volatility. This plot suggests such clustering might be present, although the spike at the end may skew this perception. The conclusion is the same as the above plots.

df1 = san_jose_data

df1$date <- as.Date(df1$date, format = "%Y-%m-%d")
df1 <- na.omit(df1)
str(df1)
## 'data.frame':    54 obs. of  9 variables:
##  $ date               : Date, format: "2018-08-31" "2018-09-30" ...
##  $ Mean.Sale.Price    : num  1138521 1121256 1133715 1104818 1081306 ...
##  $ state              : chr  "San Jose, CA" "San Jose, CA" "San Jose, CA" "San Jose, CA" ...
##  $ Mean.Home.Value    : num  1158784 1169495 1177582 1180853 1179078 ...
##  $ mean               : num  2933 2929 2918 2901 2887 ...
##  $ Metropolitan.Area  : chr  "San Jose, CA" "San Jose, CA" "San Jose, CA" "San Jose, CA" ...
##  $ Home_Value_Return  : num  38.699 0.924 0.691 0.278 -0.15 ...
##  $ Rental_Price_Return: num  -0.575 -0.121 -0.379 -0.605 -0.475 ...
##  $ Sale_Price_Return  : num  45.46 -1.52 1.11 -2.55 -2.13 ...
summary(df1)
##       date            Mean.Sale.Price      state           Mean.Home.Value  
##  Min.   :2018-08-31   Min.   :1048681   Length:54          Min.   : 252710  
##  1st Qu.:2019-10-07   1st Qu.:1085063   Class :character   1st Qu.:1107782  
##  Median :2020-11-15   Median :1178133   Mode  :character   Median :1178330  
##  Mean   :2020-11-14   Mean   :1207943                      Mean   :1216959  
##  3rd Qu.:2021-12-23   3rd Qu.:1327534                      3rd Qu.:1332424  
##  Max.   :2023-01-31   Max.   :1476921                      Max.   :1522605  
##       mean      Metropolitan.Area  Home_Value_Return  Rental_Price_Return
##  Min.   :2722   Length:54          Min.   :-83.0549   Min.   :-1.6291    
##  1st Qu.:2901   Class :character   1st Qu.: -0.4637   1st Qu.:-0.5509    
##  Median :2968   Mode  :character   Median :  0.6393   Median : 0.1203    
##  Mean   :2969                      Mean   :  8.4294   Mean   : 0.1440    
##  3rd Qu.:3000                      3rd Qu.:  1.3725   3rd Qu.: 0.7496    
##  Max.   :3268                      Max.   :475.4908   Max.   : 1.9657    
##  Sale_Price_Return
##  Min.   :-2.6579  
##  1st Qu.:-0.7631  
##  Median : 0.5676  
##  Mean   : 1.1378  
##  3rd Qu.: 1.3439  
##  Max.   :45.4633

First Fit a Linear Model

I think that the home value can be consisted of the sale price and rental price

library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
set.seed(236) # for reproducibility
splitIndex <- createDataPartition(df1$Mean.Home.Value, p = 0.8, list = FALSE)
train <- df1[splitIndex, ]
test <- df1[-splitIndex, ]
head(train)
##         date Mean.Sale.Price        state Mean.Home.Value     mean
## 1 2018-08-31         1138521 San Jose, CA         1158784 2932.893
## 2 2018-09-30         1121256 San Jose, CA         1169495 2929.350
## 3 2018-10-31         1133715 San Jose, CA         1177582 2918.253
## 5 2018-12-31         1081306 San Jose, CA         1179078 2886.816
## 7 2019-02-28         1060675 San Jose, CA         1152091 2903.899
## 9 2019-04-30         1071541 San Jose, CA         1114588 2942.314
##   Metropolitan.Area Home_Value_Return Rental_Price_Return Sale_Price_Return
## 1      San Jose, CA        38.6990930          -0.5753548       45.46331479
## 2      San Jose, CA         0.9243363          -0.1207855       -1.51644107
## 3      San Jose, CA         0.6914401          -0.3788268        1.11116462
## 5      San Jose, CA        -0.1503518          -0.4754155       -2.12813332
## 7      San Jose, CA        -1.4509413           0.5014357        0.03461258
## 9      San Jose, CA        -1.5732876           0.5401285       -0.31842927
# Linear model and check coefficients
model <- lm(Mean.Home.Value ~ Mean.Sale.Price + mean, data = train)
summary(model)
## 
## Call:
## lm(formula = Mean.Home.Value ~ Mean.Sale.Price + mean, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -44779 -23485  -3919  16998  86591 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.369e+05  9.253e+04  -6.883  1.9e-08 ***
## Mean.Sale.Price  9.886e-01  3.863e-02  25.594  < 2e-16 ***
## mean             2.288e+02  3.582e+01   6.387  1.0e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 28990 on 43 degrees of freedom
## Multiple R-squared:  0.9637, Adjusted R-squared:  0.962 
## F-statistic: 570.3 on 2 and 43 DF,  p-value: < 2.2e-16

The coefficients are significant

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
vif(model)
## Mean.Sale.Price            mean 
##        1.328619        1.328619

We can see that the Mean sale price and rental price have much relatively low VIF scores. Therefore, we can keep them in modeling:

Evaluations of the model

predictions <- predict(model, newdata = test)
SSE <- sum((predictions - test$Mean.Home.Value)^2)
SST <- sum((test$Mean.Home.Value - mean(train$Mean.Home.Value))^2)
rsquared_test <- 1 - SSE/SST
rsquared_test 
## [1] -0.3538088

Prepare to fit the model and get residuals

lm.residuals <- residuals(model)
plot(train$Mean.Home.Value, lm.residuals, xlab = "The Home Values", ylab = "Residuals", main = "Residuals VS Home Values")

check correlation in these residuals using an ACF plot

acf(lm.residuals, main = "ACF of Model Residuals")

Pacf(lm.residuals, main = "ACF of Model Residuals")

Based on the acf and pacf, we can choose p = 0,1,2,3 q = ,0,1,2

library(tseries)
adf.test(lm.residuals)
## Warning in adf.test(lm.residuals): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  lm.residuals
## Dickey-Fuller = -5.3164, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary

WE can see that there are little correlations left. The lm residuals are stationary now.

Fit an appropriate AR+ARCH/ARMA+GARCH or ARIMA-ARCH/GARCH

First, determine the ARIMA model using model diagnostic

# Reference from the lab 6 part 1 demo:
temp.ts = diff(lm.residuals)
d=0
i=1
temp= data.frame()
ls=matrix(rep(NA,6*71),nrow=71)

for (p in 1:5)
{
  for(q in 1:5)
  {
    for(d in 0:2)#
    {
      
      if(p-1+d+q-1<=8)
      {
        
        model<- Arima(temp.ts,order=c(p-1,d,q-1),include.drift=FALSE) 
        ls[i,]= c(p-1,d,q-1,model$aic,model$bic,model$aicc)
        i=i+1
        #print(i)
        
      }
      
    }
  }
}

temp= as.data.frame(ls)
names(temp)= c("p","d","q","AIC","BIC","AICc")

knitr::kable(temp)
p d q AIC BIC AICc
0 0 0 1018.4226 1022.0359 1018.7083
0 1 0 1009.6378 1011.4220 1009.7330
0 2 0 1025.8850 1027.6462 1025.9825
0 0 1 1016.5076 1021.9276 1017.0930
0 1 1 1000.6753 1004.2437 1000.9680
0 2 1 993.4236 996.9460 993.7236
0 0 2 1018.5069 1025.7335 1019.5069
0 1 2 998.2999 1003.6525 998.8999
0 2 2 990.6684 995.9520 991.2838
0 0 3 1017.5903 1026.6236 1019.1287
0 1 3 1000.2669 1007.4037 1001.2926
0 2 3 986.4701 993.5149 987.5227
0 0 4 1016.5781 1027.4181 1018.7887
0 1 4 1002.2665 1011.1875 1003.8455
0 2 4 988.3123 997.1183 989.9339
1 0 0 1016.9109 1022.3309 1017.4963
1 1 0 1008.4584 1012.0267 1008.7510
1 2 0 1014.1853 1017.7077 1014.4853
1 0 1 1018.5071 1025.7337 1019.5071
1 1 1 998.5628 1003.9154 999.1628
1 2 1 992.7852 998.0688 993.4006
1 0 2 1020.5035 1029.5368 1022.0420
1 1 2 1000.2698 1007.4066 1001.2955
1 2 2 986.4761 993.5209 987.5288
1 0 3 1022.5051 1033.3451 1024.7156
1 1 3 1002.2719 1011.1928 1003.8508
1 2 3 988.3002 997.1062 989.9218
1 0 4 1016.4342 1029.0808 1019.4612
1 1 4 1000.0170 1010.7222 1002.2873
1 2 4 990.2964 1000.8636 992.6297
2 0 0 1018.3024 1025.5291 1019.3024
2 1 0 1007.2109 1012.5634 1007.8109
2 2 0 1003.4441 1008.7277 1004.0595
2 0 1 1011.8179 1020.8512 1013.3564
2 1 1 1000.1622 1007.2989 1001.1878
2 2 1 992.0109 999.0557 993.0636
2 0 2 1012.0151 1022.8551 1014.2256
2 1 2 996.1185 1005.0394 997.6974
2 2 2 988.2488 997.0548 989.8704
2 0 3 1013.9075 1026.5542 1016.9346
2 1 3 1004.2690 1014.9741 1006.5392
2 2 3 993.1486 1003.7158 995.4819
2 0 4 1019.3691 1033.8224 1023.3691
2 1 4 1003.1896 1015.6789 1006.3007
2 2 4 992.1647 1004.4931 995.3647
3 0 0 1019.4057 1028.4390 1020.9442
3 1 0 1009.1645 1016.3012 1010.1901
3 2 0 1000.6750 1007.7198 1001.7276
3 0 1 1012.9106 1023.7506 1015.1211
3 1 1 1001.5913 1010.5123 1003.1703
3 2 1 994.0092 1002.8152 995.6308
3 0 2 1023.0482 1035.6949 1026.0752
3 1 2 997.1656 1007.8707 999.4359
3 2 2 989.8264 1000.3936 992.1597
3 0 3 1022.6168 1037.0701 1026.6168
3 1 3 1004.6833 1017.1727 1007.7944
3 2 3 996.5815 1008.9099 999.7815
3 0 4 1023.9626 1040.2226 1029.1055
3 1 4 1004.1571 1018.4306 1008.2714
4 0 0 1017.0838 1027.9238 1019.2943
4 1 0 1010.6922 1019.6132 1012.2712
4 2 0 1002.5527 1011.3587 1004.1743
4 0 1 1012.2330 1024.8796 1015.2600
4 1 1 999.8885 1010.5936 1002.1588
4 2 1 995.7996 1006.3668 998.1329
4 0 2 1014.2329 1028.6862 1018.2329
4 1 2 997.2519 1009.7412 1000.3630
4 2 2 997.3972 1009.7256 1000.5972
4 0 3 1013.1322 1029.3921 1018.2750
4 1 3 999.2231 1013.4966 1003.3374
4 0 4 1014.6811 1032.7478 1021.1517

Evaluations using AIC, BIC, and AICc

temp[which.min(temp$AIC),]
##    p d q      AIC      BIC     AICc
## 12 0 2 3 986.4701 993.5149 987.5227
temp[which.min(temp$BIC),]
##    p d q      AIC      BIC     AICc
## 12 0 2 3 986.4701 993.5149 987.5227
temp[which.min(temp$AICc),]
##    p d q      AIC      BIC     AICc
## 12 0 2 3 986.4701 993.5149 987.5227

We can see that for (0,2,3), the AIC, BIC, and AICc are all the smallest. Therefore we can use ARIMA(0,2,3)

Fit the best one ARIMA(0,2,3)

fit2 <- arima(temp.ts, order = c(0,2,3))
summary(fit2)
## 
## Call:
## arima(x = temp.ts, order = c(0, 2, 3))
## 
## Coefficients:
##           ma1     ma2     ma3
##       -1.6336  0.2880  0.3576
## s.e.   0.1825  0.3108  0.1569
## 
## sigma^2 estimated as 355176185:  log likelihood = -489.24,  aic = 986.47
## 
## Training set error measures:
##                    ME     RMSE     MAE      MPE     MAPE      MASE       ACF1
## Training set 4158.771 18422.56 13947.6 101.5043 123.1333 0.8312767 0.03776277

get the residuals of the arima model

arima.res <- residuals(fit2)
# Plot the residuals
plot(arima.res, main = "Residuals of ARIMA Model")

# ACF plot of the residuals
acf(arima.res, main = "ACF of ARIMA Model Residuals")

# PACF plot of the residuals
Pacf(arima.res, main = "PACF of ARIMA Model Residuals")

arima.res <- residuals(fit2)
# Plot the residuals
plot(arima.res, main = "Residuals of ARIMA Model")

# ACF plot of the residuals
acf(arima.res, main = "ACF of ARIMA Model Residuals")

# PACF plot of the residuals
Pacf(arima.res, main = "PACF of ARIMA Model Residuals")

We can see that mostly, the residual is stationary however the residuals of ARIMA model indicates some flutucations. In this case, I think that we can try to fit an ARCH/GARCH model.

Therefore, I think we can further making analysis by adding GARCH models to see if we should use it.

Model Diagnostics For ARCH/GARCH model

arima.res <- residuals(fit2)
# Plot the residuals
plot(arima.res^2, main = "Residuals of Squared ARIMA Model")

# ACF plot of the residuals
acf(arima.res^2, main = "ACF of Squared Residuals")

# PACF plot of the residuals
Pacf(arima.res^2, main = "PACF of Squared Residuals")

We can see that it acutally already sufficient to just use ARIMA model. GARCH model is not required since it is already pretty good in removing correlations. The residual and squared residual are stationary.

However, I still think that I can at least try to make a further diagnostic and compare the results to verify that I do not need to fit GARCH model.

mean_res <- mean(arima.res, na.rm = TRUE)
sd_res <- sd(arima.res, na.rm = TRUE)

# Normalize the residuals
arima.res <- (arima.res - mean_res) / sd_res

model <- list() ## set counter
cc <- 1
for (p in 1:7) {
  for (q in 1:7) {
  
model[[cc]] <- garch(arima.res,order=c(q,p),trace=F)
cc <- cc + 1
}
} 

## get AIC values for model evaluation
GARCH_AIC <- sapply(model, AIC) ## model with lowest AIC is the best
which(GARCH_AIC == min(GARCH_AIC))
## [1] 43
model[[which(GARCH_AIC == min(GARCH_AIC))]]
## 
## Call:
## garch(x = arima.res, order = c(q, p), trace = F)
## 
## Coefficient(s):
##        a0         a1         a2         a3         a4         a5         a6  
## 5.604e-01  2.481e-02  4.198e-16  2.442e-02  1.949e-02  2.519e-02  1.171e-02  
##        a7         b1  
## 3.297e-02  1.236e-02

From here, I think that garch(7,1) is a good choice

library(fGarch)
## NOTE: Packages 'fBasics', 'timeDate', and 'timeSeries' are no longer
## attached to the search() path when 'fGarch' is attached.
## 
## If needed attach them yourself in your R script by e.g.,
##         require("timeSeries")
## 
## Attaching package: 'fGarch'
## The following object is masked from 'package:TTR':
## 
##     volatility
summary(garchFit(~garch(7,1), arima.res,trace = F)) 
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(7, 1), data = arima.res, trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(7, 1)
## <environment: 0x000001df3a759b68>
##  [data = arima.res]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##         mu       omega      alpha1      alpha2      alpha3      alpha4  
## 1.4487e-16  1.0201e-01  1.0000e-08  1.0000e-08  1.0000e-08  1.0000e-08  
##     alpha5      alpha6      alpha7       beta1  
## 2.5669e-02  1.0000e-08  1.0000e-08  7.9655e-01  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)
## mu     1.449e-16   1.251e-01    0.000    1.000
## omega  1.020e-01   8.867e-02    1.150    0.250
## alpha1 1.000e-08   1.475e-01    0.000    1.000
## alpha2 1.000e-08         NaN      NaN      NaN
## alpha3 1.000e-08         NaN      NaN      NaN
## alpha4 1.000e-08   2.325e-01    0.000    1.000
## alpha5 2.567e-02   1.627e-01    0.158    0.875
## alpha6 1.000e-08   1.617e-01    0.000    1.000
## alpha7 1.000e-08         NaN      NaN      NaN
## beta1  7.966e-01         NaN      NaN      NaN
## 
## Log Likelihood:
##  -61.66195    normalized:  -1.370265 
## 
## Description:
##  Mon Nov 20 16:55:00 2023 by user: yzh20 
## 
## 
## Standardised Residuals Tests:
##                                  Statistic   p-Value
##  Jarque-Bera Test   R    Chi^2   1.7044558 0.4264637
##  Shapiro-Wilk Test  R    W       0.9824764 0.7206835
##  Ljung-Box Test     R    Q(10)   4.1291394 0.9413333
##  Ljung-Box Test     R    Q(15)   6.6504841 0.9666331
##  Ljung-Box Test     R    Q(20)   9.1194863 0.9814815
##  Ljung-Box Test     R^2  Q(10)   7.0865928 0.7172456
##  Ljung-Box Test     R^2  Q(15)   7.5672814 0.9399582
##  Ljung-Box Test     R^2  Q(20)   9.8514655 0.9707860
##  LM Arch Test       R    TR^2   10.1871278 0.5995480
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 3.184975 3.586456 3.108256 3.334643

The results shows that (7,1) is not an optimal fit. The coefficients are not significant.

Therefore, let us try (1,0)

Try to compare with Garch(1,0)

summary(garchFit(~garch(1,0), arima.res,trace = F)) 
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 0), data = arima.res, trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 0)
## <environment: 0x000001df3d7bf820>
##  [data = arima.res]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##         mu       omega      alpha1  
## 1.4487e-16  6.6975e-01  2.8441e-01  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu     1.449e-16   1.371e-01    0.000 1.000000    
## omega  6.698e-01   1.777e-01    3.770 0.000163 ***
## alpha1 2.844e-01   1.987e-01    1.431 0.152410    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -60.76395    normalized:  -1.35031 
## 
## Description:
##  Mon Nov 20 16:55:00 2023 by user: yzh20 
## 
## 
## Standardised Residuals Tests:
##                                  Statistic   p-Value
##  Jarque-Bera Test   R    Chi^2   0.7535330 0.6860762
##  Shapiro-Wilk Test  R    W       0.9856748 0.8442263
##  Ljung-Box Test     R    Q(10)   2.2865882 0.9936344
##  Ljung-Box Test     R    Q(15)   5.6662238 0.9848256
##  Ljung-Box Test     R    Q(20)   6.9886210 0.9967222
##  Ljung-Box Test     R^2  Q(10)   5.4594833 0.8584504
##  Ljung-Box Test     R^2  Q(15)   6.0592777 0.9787347
##  Ljung-Box Test     R^2  Q(20)   8.1994259 0.9904640
##  LM Arch Test       R    TR^2   10.8798294 0.5392445
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 2.833953 2.954397 2.825783 2.878854

Now, it seems that the alpha1 is still not significant. Therefore, in this case, we can stop with arima model. No additional Garch is needed.

However, we still can compare the prediction fits.

Forecast Comparison

ARIMA (0,2,3)

# Autoplot with custom colors
plot_fit_ <- autoplot(forecast(fit2,10)) + 
  theme(
    panel.background = element_rect(fill = "#E0E0E0", color = "#E0E0E0"),
    plot.background = element_rect(fill = "#E0E0E0", color = "#E0E0E0"),
    legend.background = element_rect(fill = "#E0E0E0", color = "#E0E0E0"),
    legend.key = element_rect(fill = "#E0E0E0", color = "#E0E0E0")
  )

# Display the plot
print(plot_fit_)

ARIMA (0,2,3) + Garch(1,0)

summary(final.fit <- garchFit(~garch(1,0), arima.res,trace = F)) 
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 0), data = arima.res, trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 0)
## <environment: 0x000001df32277168>
##  [data = arima.res]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##         mu       omega      alpha1  
## 1.4487e-16  6.6975e-01  2.8441e-01  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu     1.449e-16   1.371e-01    0.000 1.000000    
## omega  6.698e-01   1.777e-01    3.770 0.000163 ***
## alpha1 2.844e-01   1.987e-01    1.431 0.152410    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -60.76395    normalized:  -1.35031 
## 
## Description:
##  Mon Nov 20 16:55:01 2023 by user: yzh20 
## 
## 
## Standardised Residuals Tests:
##                                  Statistic   p-Value
##  Jarque-Bera Test   R    Chi^2   0.7535330 0.6860762
##  Shapiro-Wilk Test  R    W       0.9856748 0.8442263
##  Ljung-Box Test     R    Q(10)   2.2865882 0.9936344
##  Ljung-Box Test     R    Q(15)   5.6662238 0.9848256
##  Ljung-Box Test     R    Q(20)   6.9886210 0.9967222
##  Ljung-Box Test     R^2  Q(10)   5.4594833 0.8584504
##  Ljung-Box Test     R^2  Q(15)   6.0592777 0.9787347
##  Ljung-Box Test     R^2  Q(20)   8.1994259 0.9904640
##  LM Arch Test       R    TR^2   10.8798294 0.5392445
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 2.833953 2.954397 2.825783 2.878854
predict(final.fit, n.ahead = 10, plot=TRUE)

##    meanForecast meanError standardDeviation lowerInterval upperInterval
## 1  1.448735e-16 1.1405784         1.1405784     -2.235493      2.235493
## 2  1.448735e-16 1.0196832         1.0196832     -1.998542      1.998542
## 3  1.448735e-16 0.9825858         0.9825858     -1.925833      1.925833
## 4  1.448735e-16 0.9717762         0.9717762     -1.904646      1.904646
## 5  1.448735e-16 0.9686797         0.9686797     -1.898577      1.898577
## 6  1.448735e-16 0.9677973         0.9677973     -1.896848      1.896848
## 7  1.448735e-16 0.9675461         0.9675461     -1.896356      1.896356
## 8  1.448735e-16 0.9674747         0.9674747     -1.896216      1.896216
## 9  1.448735e-16 0.9674544         0.9674544     -1.896176      1.896176
## 10 1.448735e-16 0.9674486         0.9674486     -1.896164      1.896164

By comparing the fits, I think that the ARIMA model alone is better. We do not need the GArch in my case

Final model: ARIMA (0,2,3) and equation

Box Ljung Test

box_ljung_test <- Box.test(arima.res, lag = 10, type = "Ljung-Box")

# Display the test results
box_ljung_test
## 
##  Box-Ljung test
## 
## data:  arima.res
## X-squared = 4.4131, df = 10, p-value = 0.9268

the p-value is above a significance level 0.05, therefore, I do not reject the null hypothesis. This suggests that the residuals do not exhibit autocorrelation and that the model is adequate. This conclusion alignes with my model diagnostics and forecasting comparesion. The ARIMA(0,2,3) alone is enough in my case.

Equation

The ARIMA(0,2,3) model is defined as:

\[ (1 - B)^2 X_t = (1 + \theta_1B + \theta_2B^2 + \theta_3B^3)a_t \]

where: - \(B\) is the backshift operator, - \(X_t\) is the time series observation at time t, - \((1 - B)^2\) denotes the second-order differencing, - \(\theta_1, \theta_2, \theta_3\) are the parameters of the model, - \(a_t\) is the error term at time t.

No Garch components since in my case the ARIMA is better and sufficient.